*****************************************************************
* Replication directory for                                   ***
* Prime locations                                             ***
* by Gabriel M. Ahlfeldt, Thilo N.H. Albers, Kristian Behrens ***
* Published in American Economic Review: Insights             ***
*****************************************************************
* 01/2025
* Stata
version 17.0


* This do file calls geopandas to generate grid shapefiles with predicted employment and other data for Global Cities
* These employment grid shapefiles are save as zip archivesn for distribution via the GitHub toolkit
* Execution requires a Python working environment with packages mentioned at the beginning of the Python code
* Should the code not run through in Stata, the part between "python:" and "end" can be executed in Python
* In this case, you must manually set the root directory in line 63. 

cd "$root"

* Basline grid data
	u "$temp/125CitiesPLs/grid125_PL_output.dta", clear
			
	* Merge photo data
	merge 1:1 grid_x grid_y using "$data_125cities/PHOTOS/PHOTOCOUNT_250M.dta" ,   	
		tab _m
		drop if _m == 2
		drop _m
		ren count PHOTOCOUNT
		distinct metro_id if PHOTOCOUNT!=0 & PHOTOCOUNT!=.
		local N_photo_c=r(ndistinct)
											
	* MERGE POPDENS	
	merge 1:1 grid_x grid_y using "$data/125 GLOBAL CITIES/POP/GRID_POP",     
		drop if _m == 2
		drop _m		
										
* Gen cell area
	gen Area = Carea / 1000000
	gen PhotoDens = PHOTOCOUNT / Area
	replace PhotoDens = 0 if PhotoDens == .
	ren devle Devle
	ren popdens PopDens
	egen metro_emp = sum(cell_PS_emp), by(metro_id)
	gen PSEmpSh = cell_PS_emp/metro_emp*100
	egen metro_photo = sum(PHOTOCOUNT), by(metro_id)
	gen PhotoSh = PHOTOCOUNT / metro_photo * 100
	keep metro_id	cell_id  Devle PSEmpSh PLID_Global  PhotoDens PopDens PhotoSh

	* Do this by grid125
	levelsof metro_id, local(metro_ids)
		foreach x of local metro_ids{
		export delimited using "_Work/TEMP/grid125/grid125_info_`x'.csv" if metro_id == `x', replace
		}
		
********************************************************************************

python:

import os
import pandas as pd
import geopandas as gpd
import zipfile

# Set the root directory to the Stata working directory
root_directory = os.getcwd()


# Define input/output directories
metro_list_file = os.path.join(root_directory, r'_Data/125 GLOBAL CITIES/METRO_LEVEL_COVARIATES/metrolist125.csv')
data_folder = os.path.join(root_directory, r'_Work/TEMP/grid125')
shapefile_folder = os.path.join(root_directory, r'_Data/125 GLOBAL CITIES/GIS/NEWCELLS')
output_folder = os.path.join(root_directory, r'_Work/Output/Shapes/GlobalCities')

# Ensure the output directory exists
os.makedirs(output_folder, exist_ok=True)

# Read metro_id list
metro_df = pd.read_csv(metro_list_file)
metro_ids = metro_df['metro_id'].tolist()

# Variables to keep from the merged dataset
columns_to_keep = ["metro_id", "cell_id", "devle", "psempsh", "plid_glob", "photo_sh", "pop_dens"]

# Summary counters
processed = 0
skipped = 0
errors = 0

# Loop over each metro_id
for metro_id in metro_ids:
    metro_str = str(metro_id)  # Ensure the code is a string

    # Construct paths for the shapefile and data file
    shapefile_path = os.path.join(shapefile_folder, f'GRID_{metro_str}.shp')
    data_file_path = os.path.join(data_folder, f'grid125_info_{metro_str}.csv')

    # Check if the shapefile exists
    if not os.path.exists(shapefile_path):
        print(f"Shapefile not found for metro_id {metro_str}: {shapefile_path}")
        skipped += 1
        continue

    # Check if the data file exists
    if not os.path.exists(data_file_path):
        print(f"Data file not found for metro_id {metro_str}: {data_file_path}")
        skipped += 1
        continue

    # Load the shapefile
    try:
        gdf = gpd.read_file(shapefile_path)
        gdf.columns = gdf.columns.str.strip().str.lower()  # Standardize column names
        gdf.rename(columns={"unique_cel": "cell_id"}, inplace=True)  # Rename for merging
    except Exception as e:
        print(f"Error reading shapefile for metro_id {metro_str}: {e}")
        errors += 1
        continue

    # Load the data file
    try:
        data_df = pd.read_csv(data_file_path)
        data_df.columns = data_df.columns.str.strip().str.lower()  # Standardize column names
    except Exception as e:
        print(f"Error reading data file for metro_id {metro_str}: {e}")
        errors += 1
        continue

    # Perform the merge
    try:
        merged_gdf = gdf.merge(data_df, on="cell_id", how="inner")
        merged_gdf.rename(columns={
            "plid_global": "plid_glob",
            "photosh": "photo_sh",
            "popdens": "pop_dens"
        }, inplace=True)
    except Exception as e:
        print(f"Error merging data for metro_id {metro_str}: {e}")
        errors += 1
        continue

    # Filter to keep only required columns
    try:
        merged_gdf = merged_gdf[["geometry"] + columns_to_keep]
    except KeyError as e:
        print(f"Error filtering columns for metro_id {metro_str}: {e}")
        errors += 1
        continue

    # Save the shapefile
    shapefile_base = os.path.join(output_folder, f'grid_{metro_str}')
    try:
        merged_gdf.to_file(f"{shapefile_base}.shp")
        print(f"Saved merged shapefile for metro_id {metro_str} to: {shapefile_base}.shp")
    except Exception as e:
        print(f"Error saving shapefile for metro_id {metro_str}: {e}")
        errors += 1
        continue

    # Create ZIP archive
    try:
        zip_filename = f"{shapefile_base}.zip"
        with zipfile.ZipFile(zip_filename, 'w') as zipf:
            for ext in ['.shp', '.shx', '.dbf', '.prj', '.cpg']:
                shapefile_component = f"{shapefile_base}{ext}"
                if os.path.exists(shapefile_component):
                    zipf.write(shapefile_component, arcname=os.path.basename(shapefile_component))
        print(f"Created ZIP archive for metro_id {metro_str}: {zip_filename}")
    except Exception as e:
        print(f"Error creating ZIP archive for metro_id {metro_str}: {e}")
        errors += 1
        continue

    processed += 1

# Summary of processing
print(f"Processing completed: {processed} metros processed, {skipped} skipped, {errors} errors.")


end

* Script ends
